library(xml2)
library(httr)
## Warning: package 'httr' was built under R version 4.2.3
library(rvest)
## Warning: package 'rvest' was built under R version 4.2.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
r <- read_html("https://datatrail-jhu.github.io/stable_website/webscrape.html")
s <- html_nodes(r,"strong")
t <- html_text(s)
print(t)
## [1] "rvest"        "httr"         "dbplyr"       "jsonlite"     "googlesheets"
r2 <- read_html("https://www.bbc.com")
s2 <- html_nodes(r2,".media__link") # can be tracked using extension named:s
t2 <- html_text(s2)
print(trimws(t2))
##  [1] "Biden defends sending cluster bombs to Ukraine"                     
##  [2] "Why is US giving Ukraine 'abhorrent' weapons?"                      
##  [3] "Zelensky visits Snake Island as war enters 500th day"               
##  [4] "Ashes news: Build-up to crucial day of third England-Australia Test"
##  [5] "Dutch government collapses over asylum row"                         
##  [6] "Can France prevent tensions igniting again?"                        
##  [7] "AI robot asked 'will you rebel against humans'?"                    
##  [8] "No charges for security who blocked Britney Spears"                 
##  [9] "Murray unsure he has motivation for Wimbledon return"               
## [10] "England are in a 'winnable' position - Moeen"                       
## [11] "Van der Sar in intensive care after bleed on brain"                 
## [12] "The islands gifted as an unpaid debt"                               
## [13] "Sixteen of the best films of 2023"                                  
## [14] "The maps revealing urban heat stress"                               
## [15] "Pressure builds on S Korea to send Ukraine stockpiled ammo"         
## [16] "Coco Lee death sparks China mental health discussion"               
## [17] "What Asian fans did for Taylor Swift concert tickets"               
## [18] "Australian welfare hunt caused suicides - inquiry"                  
## [19] "Why Wimbledon’s dress code is so strict"                            
## [20] "What would green shipping look like?"                               
## [21] "Rava upma: warm and savoury semolina"                               
## [22] "The rise of job-searching burnout"                                  
## [23] "Why America could be overtaken as the corn superpower"              
## [24] "Bruce Springsteen settles an old score in Hyde Park"                
## [25] "'Almost every influencer will be hopping on Threads'"               
## [26] "The latest technology news direct to your inbox"                    
## [27] "Theatres tempt new audiences with virtual reality"                  
## [28] "BBC visits Belarus camp offered to Wagner"                          
## [29] "BBC visits Belarus camp offered to Wagner"                          
## [30] "Hail batters Spain creating icy urban scenes"                       
## [31] "At the scene the day after fatal Wimbledon..."                      
## [32] "Belarus leader pressed on nuclear weapons"                          
## [33] "Where is Yevgeny Prigozhin? And why does it..."                     
## [34] "Are wildfires in the US getting worse?"                             
## [35] "Australians smash Tina Turner dancing world..."                     
## [36] "Watch: Europe’s last Ariane-5 rocket blasts..."                     
## [37] "One-minute World News"                                              
## [38] "Best in show: Africa's top shots"                                   
## [39] "Young adults see rise in severe distress - study"                   
## [40] "BBC star 'accused of paying teen for explicit photos'"              
## [41] "Shipping agrees net-zero goal but critics unmoved"                  
## [42] "Malmö is Swedish city chosen to host Eurovision"                    
## [43] "Canada stops advertising with Facebook in news row"                 
## [44] "World records hottest day for third time in a week"                 
## [45] "Why this music producer smashed his platinum discs"                 
## [46] "Why Europe's 'lonely' tech entrepreneurs need help"                 
## [47] "In pictures: King Charles III celebrations"                         
## [48] "Mystery of Holocaust escape girls finally solved"                   
## [49] "Eid al-Adha around the world in pictures"                           
## [50] "Photographer shares shots of famous musicians"                      
## [51] "Millinery masterpieces at Royal Ascot"
##################################################

gitresp <- GET("https://api.github.com/users/abidalishaikh/repos")
gitcontent <- content(gitresp)

lapply(gitcontent,function(x){
  df <- data_frame(repo = x$name,
                   address = x$html_url)}) %>%
  bind_rows()
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## # A tibble: 22 × 2
##    repo                address                                             
##    <chr>               <chr>                                               
##  1 07_RegressionModels https://github.com/AbidAliShaikh/07_RegressionModels
##  2 AbidAliShaikh       https://github.com/AbidAliShaikh/AbidAliShaikh      
##  3 courses             https://github.com/AbidAliShaikh/courses            
##  4 Distill             https://github.com/AbidAliShaikh/Distill            
##  5 edoc                https://github.com/AbidAliShaikh/edoc               
##  6 education           https://github.com/AbidAliShaikh/education          
##  7 ExData_Plotting1    https://github.com/AbidAliShaikh/ExData_Plotting1   
##  8 fastdjango          https://github.com/AbidAliShaikh/fastdjango         
##  9 ggplot              https://github.com/AbidAliShaikh/ggplot             
## 10 gitSurveys          https://github.com/AbidAliShaikh/gitSurveys         
## # … with 12 more rows
##########################################

surv<- GET("https://raw.githubusercontent.com/fivethirtyeight/data/master/steak-survey/steak-risk-survey.csv")
df_surv <- content(surv,type="text/csv")
## No encoding supplied: defaulting to UTF-8.
## Rows: 551 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Consider the following hypothetical situations: <br>In Lottery A, ...
## dbl  (1): RespondentID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_surv
## # A tibble: 551 × 15
##    RespondentID Consid…¹ Do yo…² Do yo…³ Do yo…⁴ Have …⁵ Do yo…⁶ Have …⁷ Do yo…⁸
##           <dbl> <chr>    <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
##  1           NA Response Respon… Respon… Respon… Respon… Respon… Respon… Respon…
##  2   3237565956 Lottery… <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>   
##  3   3234982343 Lottery… No      Yes     No      No      No      No      Yes    
##  4   3234973379 Lottery… No      Yes     Yes     No      Yes     Yes     Yes    
##  5   3234972383 Lottery… Yes     Yes     Yes     No      Yes     Yes     Yes    
##  6   3234958833 Lottery… No      Yes     No      No      Yes     Yes     Yes    
##  7   3234955240 Lottery… No      No      No      No      Yes     No      Yes    
##  8   3234955097 Lottery… No      Yes     No      No      Yes     Yes     No     
##  9   3234955010 Lottery… No      Yes     Yes     Yes     Yes     No      Yes    
## 10   3234953052 Lottery… Yes     Yes     Yes     No      Yes     No      Yes    
## # … with 541 more rows, 6 more variables:
## #   `How do you like your steak prepared?` <chr>, Gender <chr>, Age <chr>,
## #   `Household Income` <chr>, Education <chr>,
## #   `Location (Census Region)` <chr>, and abbreviated variable names
## #   ¹​`Consider the following hypothetical situations: <br>In Lottery A, you have a 50% chance of success, with a payout of $100. <br>In Lottery B, you have a 90% chance of success, with a payout of $20. <br><br>Assuming you have $10 to bet, would you play Lottery A or Lottery B?`,
## #   ²​`Do you ever smoke cigarettes?`, ³​`Do you ever drink alcohol?`,
## #   ⁴​`Do you ever gamble?`, ⁵​`Have you ever been skydiving?`, …
###### OR WE CAN ALSO USE READ_CSV

#downloading many files for offline scrapping
download.file("https://www.ibm.com/", destfile = "ibm.html")
root_nod1 <- rvest::read_html("ibm.html")
# #root_html <- rvest::read_html(root_nod1,"html")
# body_nod <- rvest::read_html(root_nod1,"body")
# p_nod <- rvest::read_html(body_nod,"p")
# p_content <- read_text(p_nod)
########################
table_nod <- rvest::html_nodes(root_nod1, "table")
c_dataframe <- html_table(table_nod)
c_dataframe
## list()